{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from numpy import *\n",
    "\n",
    "def loadDataSet():\n",
    "    return [[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]\n",
    "\n",
    "def createC1(dataSet):\n",
    "    C1 = []\n",
    "    for transaction in dataSet:\n",
    "        for item in transaction:\n",
    "            if not [item] in C1:\n",
    "                C1.append([item])\n",
    "                \n",
    "    C1.sort()\n",
    "    return list(map(frozenset, C1))#use frozen set so we\n",
    "                            #can use it as a key in a dict    \n",
    "\n",
    "def scanD(D, Ck, minSupport):\n",
    "    ssCnt = {}\n",
    "    for tid in D:\n",
    "        for can in Ck:\n",
    "            if can.issubset(tid):\n",
    "                if can not in ssCnt: ssCnt[can]=1\n",
    "                else: ssCnt[can] += 1\n",
    "    numItems = float(len(D))\n",
    "    retList = []\n",
    "    supportData = {}\n",
    "    for key in ssCnt:\n",
    "        support = ssCnt[key]/numItems\n",
    "        if support >= minSupport:\n",
    "            retList.insert(0,key)\n",
    "        supportData[key] = support\n",
    "    return retList, supportData\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[1, 3, 4], [2, 3, 5], [1, 2, 3, 5], [2, 5]]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataSet = loadDataSet()\n",
    "dataSet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[frozenset({1}),\n",
       " frozenset({2}),\n",
       " frozenset({3}),\n",
       " frozenset({4}),\n",
       " frozenset({5})]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "C1 = createC1(dataSet)\n",
    "C1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[{1, 3, 4}, {2, 3, 5}, {1, 2, 3, 5}, {2, 5}]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "D = list(map(set, dataSet))\n",
    "D"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[frozenset({5}), frozenset({2}), frozenset({3}), frozenset({1})]"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "L1, suppData0 = scanD(D, C1, 0.5)\n",
    "L1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{frozenset({1}): 0.5,\n",
       " frozenset({3}): 0.75,\n",
       " frozenset({4}): 0.25,\n",
       " frozenset({2}): 0.75,\n",
       " frozenset({5}): 0.75}"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "suppData0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "def aprioriGen(Lk, k): #creates Ck\n",
    "    retList = []\n",
    "    lenLk = len(Lk)\n",
    "    for i in range(lenLk):\n",
    "        for j in range(i+1, lenLk): \n",
    "            L1 = list(Lk[i])[:k-2]; L2 = list(Lk[j])[:k-2]\n",
    "            L1.sort(); L2.sort()\n",
    "            if L1==L2: #if first k-2 elements are equal\n",
    "                retList.append(Lk[i] | Lk[j]) #set union\n",
    "    return retList\n",
    "\n",
    "def apriori(dataSet, minSupport = 0.5):\n",
    "    C1 = createC1(dataSet)\n",
    "    D = list(map(set, dataSet))\n",
    "    L1, supportData = scanD(D, C1, minSupport)\n",
    "    L = [L1]\n",
    "    k = 2\n",
    "    while (len(L[k-2]) > 0):\n",
    "        Ck = aprioriGen(L[k-2], k)\n",
    "        Lk, supK = scanD(D, Ck, minSupport)#scan DB to get Lk\n",
    "        supportData.update(supK)\n",
    "        L.append(Lk)\n",
    "        k += 1\n",
    "    return L, supportData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[frozenset({5}), frozenset({2}), frozenset({3}), frozenset({1})],\n",
       " [frozenset({2, 3}), frozenset({3, 5}), frozenset({2, 5}), frozenset({1, 3})],\n",
       " [frozenset({2, 3, 5})],\n",
       " []]"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "L, suppData = apriori(dataSet)\n",
    "L"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{frozenset({1}): 0.5,\n",
       " frozenset({3}): 0.75,\n",
       " frozenset({4}): 0.25,\n",
       " frozenset({2}): 0.75,\n",
       " frozenset({5}): 0.75,\n",
       " frozenset({1, 3}): 0.5,\n",
       " frozenset({2, 5}): 0.75,\n",
       " frozenset({3, 5}): 0.5,\n",
       " frozenset({2, 3}): 0.5,\n",
       " frozenset({1, 5}): 0.25,\n",
       " frozenset({1, 2}): 0.25,\n",
       " frozenset({2, 3, 5}): 0.5}"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "suppData"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[frozenset({2, 5}),\n",
       " frozenset({3, 5}),\n",
       " frozenset({1, 5}),\n",
       " frozenset({2, 3}),\n",
       " frozenset({1, 2}),\n",
       " frozenset({1, 3})]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "aprioriGen(L[0], 2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[frozenset({5}), frozenset({2}), frozenset({3})], [frozenset({2, 5})], []]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "L, suppData = apriori(dataSet, minSupport=0.7)\n",
    "L"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generateRules(L, supportData, minConf=0.7):  #supportData is a dict coming from scanD\n",
    "    bigRuleList = []\n",
    "    for i in range(1, len(L)):#only get the sets with two or more items\n",
    "        for freqSet in L[i]:\n",
    "            H1 = [frozenset([item]) for item in freqSet]\n",
    "            if (i > 1):\n",
    "                rulesFromConseq(freqSet, H1, supportData, bigRuleList, minConf)\n",
    "            else:\n",
    "                calcConf(freqSet, H1, supportData, bigRuleList, minConf)\n",
    "    return bigRuleList         \n",
    "\n",
    "def calcConf(freqSet, H, supportData, brl, minConf=0.7):\n",
    "    prunedH = [] #create new list to return\n",
    "    for conseq in H:\n",
    "        conf = supportData[freqSet]/supportData[freqSet-conseq] #calc confidence\n",
    "        if conf >= minConf: \n",
    "            print(freqSet-conseq,'-->',conseq,'conf:',conf)\n",
    "            brl.append((freqSet-conseq, conseq, conf))\n",
    "            prunedH.append(conseq)\n",
    "    return prunedH\n",
    "\n",
    "def rulesFromConseq(freqSet, H, supportData, brl, minConf=0.7):\n",
    "    m = len(H[0])\n",
    "    if (len(freqSet) > (m + 1)): #try further merging\n",
    "        Hmp1 = aprioriGen(H, m+1)#create Hm+1 new candidates\n",
    "        Hmp1 = calcConf(freqSet, Hmp1, supportData, brl, minConf)\n",
    "        if (len(Hmp1) > 1):    #need at least two sets to merge\n",
    "            rulesFromConseq(freqSet, Hmp1, supportData, brl, minConf)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "L, suppData = apriori(dataSet, minSupport=0.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frozenset({5}) --> frozenset({2}) conf: 1.0\n",
      "frozenset({2}) --> frozenset({5}) conf: 1.0\n",
      "frozenset({1}) --> frozenset({3}) conf: 1.0\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[(frozenset({5}), frozenset({2}), 1.0),\n",
       " (frozenset({2}), frozenset({5}), 1.0),\n",
       " (frozenset({1}), frozenset({3}), 1.0)]"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules = generateRules(L, suppData, minConf=0.7)\n",
    "rules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "frozenset({3}) --> frozenset({2}) conf: 0.6666666666666666\n",
      "frozenset({2}) --> frozenset({3}) conf: 0.6666666666666666\n",
      "frozenset({5}) --> frozenset({3}) conf: 0.6666666666666666\n",
      "frozenset({3}) --> frozenset({5}) conf: 0.6666666666666666\n",
      "frozenset({5}) --> frozenset({2}) conf: 1.0\n",
      "frozenset({2}) --> frozenset({5}) conf: 1.0\n",
      "frozenset({3}) --> frozenset({1}) conf: 0.6666666666666666\n",
      "frozenset({1}) --> frozenset({3}) conf: 1.0\n",
      "frozenset({5}) --> frozenset({2, 3}) conf: 0.6666666666666666\n",
      "frozenset({3}) --> frozenset({2, 5}) conf: 0.6666666666666666\n",
      "frozenset({2}) --> frozenset({3, 5}) conf: 0.6666666666666666\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[(frozenset({3}), frozenset({2}), 0.6666666666666666),\n",
       " (frozenset({2}), frozenset({3}), 0.6666666666666666),\n",
       " (frozenset({5}), frozenset({3}), 0.6666666666666666),\n",
       " (frozenset({3}), frozenset({5}), 0.6666666666666666),\n",
       " (frozenset({5}), frozenset({2}), 1.0),\n",
       " (frozenset({2}), frozenset({5}), 1.0),\n",
       " (frozenset({3}), frozenset({1}), 0.6666666666666666),\n",
       " (frozenset({1}), frozenset({3}), 1.0),\n",
       " (frozenset({5}), frozenset({2, 3}), 0.6666666666666666),\n",
       " (frozenset({3}), frozenset({2, 5}), 0.6666666666666666),\n",
       " (frozenset({2}), frozenset({3, 5}), 0.6666666666666666)]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rules = generateRules(L, suppData, minConf=0.5)\n",
    "rules"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "# from votesmart import votesmart\n",
    "# votesmart.votes.getBill(11820)   #API not working with python3"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
